library (MASS)
library (factoextra)
library (ggplot2)
library (readr)
library (dplyr)
library (caret)
library (tibble)
url <- "https://query.data.world/s/ll77ildgnhhove7mlker3g2jw7z5qr?dws=00000"
data <- read.csv (url, header= TRUE , stringsAsFactors= FALSE )
str (data)
'data.frame': 8618 obs. of 45 variables:
$ ID : int 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 ...
$ FoodGroup : chr "Dairy and Egg Products" "Dairy and Egg Products" "Dairy and Egg Products" "Dairy and Egg Products" ...
$ ShortDescrip : chr "BUTTER,WITH SALT" "BUTTER,WHIPPED,WITH SALT" "BUTTER OIL,ANHYDROUS" "CHEESE,BLUE" ...
$ Descrip : chr "Butter, salted" "Butter, whipped, with salt" "Butter oil, anhydrous" "Cheese, blue" ...
$ CommonName : chr "" "" "" "" ...
$ MfgName : chr "" "" "" "" ...
$ ScientificName : chr "" "" "" "" ...
$ Energy_kcal : num 717 717 876 353 371 334 300 376 406 387 ...
$ Protein_g : num 0.85 0.85 0.28 21.4 23.24 ...
$ Fat_g : num 81.1 81.1 99.5 28.7 29.7 ...
$ Carb_g : num 0.06 0.06 0 2.34 2.79 0.45 0.46 3.06 1.33 4.78 ...
$ Sugar_g : num 0.06 0.06 0 0.5 0.51 0.45 0.46 0 0.28 0 ...
$ Fiber_g : num 0 0 0 0 0 0 0 0 0 0 ...
$ VitA_mcg : num 684 684 840 198 292 174 241 271 263 233 ...
$ VitB6_mg : num 0.003 0.003 0.001 0.166 0.065 0.235 0.227 0.074 0.049 0.074 ...
$ VitB12_mcg : num 0.17 0.13 0.01 1.22 1.26 1.65 1.3 0.27 0.88 0.83 ...
$ VitC_mg : num 0 0 0 0 0 0 0 0 0 0 ...
$ VitE_mg : num 2.32 2.32 2.8 0.25 0.26 0.24 0.21 0 0.78 0 ...
$ Folate_mcg : num 3 3 0 36 20 65 62 18 26 18 ...
$ Niacin_mg : num 0.042 0.042 0.003 1.016 0.118 ...
$ Riboflavin_mg : num 0.034 0.034 0.005 0.382 0.351 0.52 0.488 0.45 0.434 0.293 ...
$ Thiamin_mg : num 0.005 0.005 0.001 0.029 0.014 0.07 0.028 0.031 0.027 0.046 ...
$ Calcium_mg : num 24 24 4 528 674 184 388 673 675 643 ...
$ Copper_mcg : num 0 0.016 0.001 0.04 0.024 0.019 0.021 0.024 0.056 0.042 ...
$ Iron_mg : num 0.02 0.16 0 0.31 0.43 0.5 0.33 0.64 0.16 0.21 ...
$ Magnesium_mg : num 2 2 0 23 24 20 20 22 27 21 ...
$ Manganese_mg : num 0 0.004 0 0.009 0.012 0.034 0.038 0.021 0.033 0.012 ...
$ Phosphorus_mg : num 24 23 3 387 451 188 347 490 473 464 ...
$ Selenium_mcg : num 1 1 0 14.5 14.5 14.5 14.5 14.5 28.3 14.5 ...
$ Zinc_mg : num 0.09 0.05 0.01 2.66 2.6 2.38 2.38 2.94 3.43 2.79 ...
$ VitA_USRDA : num 0.76 0.76 0.933 0.22 0.324 ...
$ VitB6_USRDA : num 0.001765 0.001765 0.000588 0.097647 0.038235 ...
$ VitB12_USRDA : num 0.07083 0.05417 0.00417 0.50833 0.525 ...
$ VitC_USRDA : num 0 0 0 0 0 0 0 0 0 0 ...
$ VitE_USRDA : num 0.1547 0.1547 0.1867 0.0167 0.0173 ...
$ Folate_USRDA : num 0.0075 0.0075 0 0.09 0.05 ...
$ Niacin_USRDA : num 0.002625 0.002625 0.000188 0.0635 0.007375 ...
$ Riboflavin_USRDA: num 0.02615 0.02615 0.00385 0.29385 0.27 ...
$ Thiamin_USRDA : num 0.004167 0.004167 0.000833 0.024167 0.011667 ...
$ Calcium_USRDA : num 0.02 0.02 0.00333 0.44 0.56167 ...
$ Copper_USRDA : num 0.00 1.78e-05 1.11e-06 4.44e-05 2.67e-05 ...
$ Magnesium_USRDA : num 0.00476 0.00476 0 0.05476 0.05714 ...
$ Phosphorus_USRDA: num 0.03429 0.03286 0.00429 0.55286 0.64429 ...
$ Selenium_USRDA : num 0.0182 0.0182 0 0.2636 0.2636 ...
$ Zinc_USRDA : num 0.008182 0.004545 0.000909 0.241818 0.236364 ...
ID FoodGroup ShortDescrip Descrip
Min. : 1001 Length:8618 Length:8618 Length:8618
1st Qu.: 8695 Class :character Class :character Class :character
Median :14273 Mode :character Mode :character Mode :character
Mean :15468
3rd Qu.:20051
Max. :93600
CommonName MfgName ScientificName Energy_kcal
Length:8618 Length:8618 Length:8618 Min. : 0.0
Class :character Class :character Class :character 1st Qu.: 93.0
Mode :character Mode :character Mode :character Median :191.0
Mean :226.4
3rd Qu.:336.0
Max. :902.0
Protein_g Fat_g Carb_g Sugar_g
Min. : 0.000 Min. : 0.0000 Min. : 0.000 Min. : 0.00
1st Qu.: 2.470 1st Qu.: 0.9925 1st Qu.: 0.040 1st Qu.: 0.00
Median : 8.285 Median : 5.2350 Median : 8.945 Median : 0.37
Mean :11.524 Mean : 10.6470 Mean : 21.819 Mean : 6.56
3rd Qu.:19.977 3rd Qu.: 13.9000 3rd Qu.: 32.900 3rd Qu.: 5.30
Max. :88.320 Max. :100.0000 Max. :100.000 Max. :99.80
Fiber_g VitA_mcg VitB6_mg VitB12_mcg
Min. : 0.000 Min. : 0.00 Min. : 0.00000 Min. : 0.000
1st Qu.: 0.000 1st Qu.: 0.00 1st Qu.: 0.03425 1st Qu.: 0.000
Median : 0.300 Median : 1.50 Median : 0.12000 Median : 0.080
Mean : 2.023 Mean : 93.97 Mean : 0.26437 Mean : 1.225
3rd Qu.: 2.400 3rd Qu.: 21.00 3rd Qu.: 0.35500 3rd Qu.: 1.298
Max. :79.000 Max. :30000.00 Max. :12.00000 Max. :98.890
VitC_mg VitE_mg Folate_mcg Niacin_mg
Min. : 0.000 Min. : 0.0000 Min. : 0.00 Min. : 0.000
1st Qu.: 0.000 1st Qu.: 0.0000 1st Qu.: 0.00 1st Qu.: 0.382
Median : 0.000 Median : 0.1100 Median : 7.00 Median : 2.100
Mean : 7.925 Mean : 0.8723 Mean : 50.31 Mean : 3.412
3rd Qu.: 2.500 3rd Qu.: 0.4500 3rd Qu.: 25.00 3rd Qu.: 5.029
Max. :2400.000 Max. :149.4000 Max. :5881.00 Max. :127.500
Riboflavin_mg Thiamin_mg Calcium_mg Copper_mcg
Min. : 0.0000 Min. : 0.0000 Min. : 0.00 Min. : 0.0000
1st Qu.: 0.0460 1st Qu.: 0.0300 1st Qu.: 9.00 1st Qu.: 0.0300
Median : 0.1500 Median : 0.0775 Median : 19.00 Median : 0.0790
Mean : 0.2372 Mean : 0.2099 Mean : 73.41 Mean : 0.1722
3rd Qu.: 0.2600 3rd Qu.: 0.2230 3rd Qu.: 62.00 3rd Qu.: 0.1447
Max. :17.5000 Max. :23.3750 Max. :7364.00 Max. :15.0500
Iron_mg Magnesium_mg Manganese_mg Phosphorus_mg
Min. : 0.000 Min. : 0.00 Min. : 0.0000 Min. : 0
1st Qu.: 0.530 1st Qu.: 10.00 1st Qu.: 0.0000 1st Qu.: 37
Median : 1.330 Median : 20.00 Median : 0.0220 Median : 133
Mean : 2.697 Mean : 32.75 Mean : 0.5044 Mean : 156
3rd Qu.: 2.580 3rd Qu.: 29.00 3rd Qu.: 0.2220 3rd Qu.: 216
Max. :123.600 Max. :781.00 Max. :328.0000 Max. :9918
Selenium_mcg Zinc_mg VitA_USRDA VitB6_USRDA
Min. : 0.00 Min. : 0.000 Min. : 0.00000 Min. :0.00000
1st Qu.: 0.10 1st Qu.: 0.230 1st Qu.: 0.00000 1st Qu.:0.02015
Median : 3.90 Median : 0.845 Median : 0.00167 Median :0.07059
Mean : 12.61 Mean : 1.970 Mean : 0.10441 Mean :0.15551
3rd Qu.: 21.30 3rd Qu.: 2.700 3rd Qu.: 0.02333 3rd Qu.:0.20882
Max. :1917.00 Max. :90.950 Max. :33.33333 Max. :7.05882
VitB12_USRDA VitC_USRDA VitE_USRDA Folate_USRDA
Min. : 0.00000 Min. : 0.00000 Min. :0.000000 Min. : 0.0000
1st Qu.: 0.00000 1st Qu.: 0.00000 1st Qu.:0.000000 1st Qu.: 0.0000
Median : 0.03333 Median : 0.00000 Median :0.007333 Median : 0.0175
Mean : 0.51052 Mean : 0.08806 Mean :0.058155 Mean : 0.1258
3rd Qu.: 0.54062 3rd Qu.: 0.02778 3rd Qu.:0.030000 3rd Qu.: 0.0625
Max. :41.20417 Max. :26.66667 Max. :9.960000 Max. :14.7025
Niacin_USRDA Riboflavin_USRDA Thiamin_USRDA Calcium_USRDA
Min. :0.00000 Min. : 0.00000 Min. : 0.00000 Min. :0.00000
1st Qu.:0.02388 1st Qu.: 0.03538 1st Qu.: 0.02500 1st Qu.:0.00750
Median :0.13125 Median : 0.11539 Median : 0.06458 Median :0.01583
Mean :0.21322 Mean : 0.18250 Mean : 0.17488 Mean :0.06118
3rd Qu.:0.31433 3rd Qu.: 0.20000 3rd Qu.: 0.18583 3rd Qu.:0.05167
Max. :7.96875 Max. :13.46154 Max. :19.47917 Max. :6.13667
Copper_USRDA Magnesium_USRDA Phosphorus_USRDA Selenium_USRDA
Min. :0.000e+00 Min. :0.00000 Min. : 0.00000 Min. : 0.00000
1st Qu.:3.333e-05 1st Qu.:0.02381 1st Qu.: 0.05286 1st Qu.: 0.00182
Median :8.778e-05 Median :0.04762 Median : 0.19000 Median : 0.07091
Mean :1.913e-04 Mean :0.07797 Mean : 0.22284 Mean : 0.22936
3rd Qu.:1.608e-04 3rd Qu.:0.06905 3rd Qu.: 0.30857 3rd Qu.: 0.38727
Max. :1.672e-02 Max. :1.85952 Max. :14.16857 Max. :34.85455
Zinc_USRDA
Min. :0.00000
1st Qu.:0.02091
Median :0.07682
Mean :0.17911
3rd Qu.:0.24546
Max. :8.26818